# GSS_coding_for_imputed_datasets.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.


# This file is for use after datasets have been created by
# GSS_create_imputed.R.  It merges data (e.g., state-year control variables)
# into the imputed datasets.  

library(Bullock, lib.loc = c(.libPaths(), 'packageLibrary'))   # for split_fac(), sumNA()
library(car)       # for Recode()
library(dplyr)     # for %>%, select()
library(haven)     # for read_spss()
library(tools)     # for toTitleCase()

source('CSL_coding.R')          
source('functions/mergeStateControlVars.R')


# LOAD IMPUTED DATASETS
GSSEqwlth   <- new.env()
GSSGoveqinc <- new.env()
GSSHelpPoor <- new.env()
GSSWelfare  <- new.env()
load('data/GSSImputedDatasetEqwlth.RData',   envir = GSSEqwlth)                     
load('data/GSSImputedDatasetGoveqinc.RData', envir = GSSGoveqinc)
load('data/GSSImputedDatasetHelppoor.RData', envir = GSSHelpPoor)
load('data/GSSImputedDatasetWelfare.RData',  envir = GSSWelfare)

# LOAD MAIN GSS DATASET TO ADD SOME VARIABLES LATER ON
GSS <- readRDS('data/GSS_withMergedCSLs.RDS') %>% 
  mutate(respondentID = as.integer(respondentID))




########################################################
# RECODE VARIABLES AND ADD VARIABLES TO IMPUTED DATASETS
########################################################
dfForMerging <- GSS %>% 
  select(
    respondentID, 
    yearInt, 
    yearYoung, 
    stateYoung, 
    state.contemp, 
    YOB) 

for (depvar in c('eqwlth', 'goveqinc', 'helppoor', 'welfare')) {
  if (depvar == 'eqwlth') {
    imputations <- GSSEqwlth$eqwlth.out$imputations
  } else if (depvar == 'goveqinc') {
    imputations <- GSSGoveqinc$goveqinc.out$imputations
  } else if (depvar == 'helppoor') {
    imputations <- GSSHelpPoor$helppoor.out$imputations
  } else if (depvar == 'welfare') {
    imputations <- GSSWelfare$welfare.out$imputations
  }

  for (impNum in 1:length(imputations)) {
    imputedDataset <- imputations[[impNum]]
    imputedDataset <- left_join(
      imputedDataset,
      dfForMerging,
      by = c('respondentID', 'yearInt'))
    imputedDataset$educ        <- pmin(imputedDataset$educ, 13)
    imputedDataset$HSgrad      <- imputedDataset$educ >= 12
    imputedDataset$yearInt.fac <- ordered(Recode(imputedDataset$yearInt, '1994 = 1996') )
    imputedDataset$race        <- Recode(droplevels(imputedDataset$race), '"WHITE"="white"; "BLACK"="black"; "OTHER"="otherRace"')
    imputedDataset$race        <- relevel(imputedDataset$race, 'white')
    imputedDataset$black       <- imputedDataset$race=='black'
    imputedDataset$white       <- imputedDataset$race=='white'
    imputedDataset$yearYoungNorm <- imputedDataset$yearYoung - 1962  # GSS median is 1962

    # Segregation-related measures [2016 06 23]
    imputedDataset$blackPostBrown  <- 
      imputedDataset$race == 'black'   & 
      imputedDataset$yearYoung >= 1958 & 
      imputedDataset$stateYoung %IN% c(
        'AL', 'AR', 'DE', 'DC', 'FL', 'GA', 'KY', 'LA', 'MD', 'MS', 'NC', 'OK', 
        'SC', 'TN', 'TX', 'VA', 'WV')
    imputedDataset$MSDuringRepeal <- 
      imputedDataset$yearYoung %IN% 1957:1982 & 
      imputedDataset$stateYoung == 'MS' & 
      imputedDataset$race == 'white'
    imputedDataset$SCDuringRepeal <- 
      imputedDataset$yearYoung %IN% 1956:1971 & 
      imputedDataset$stateYoung == 'SC' &
      imputedDataset$race == 'white'
    imputedDataset$duringRepeal <- (imputedDataset$MSDuringRepeal | imputedDataset$SCDuringRepeal) & imputedDataset$race == 'white'
    
    
    ##########################################################################
    # MERGE STATE-LEVEL CHARACTERISTICS INTO IMPUTED DATASET
    ##########################################################################
    imputedDataset <- bind_cols(
      imputedDataset, 
      mergeStateControlVars(
        imputedDataset$stateYoung, 
        imputedDataset$yearYoung))

    

    ##############################################################
    # MERGE CSL INSTRUMENTS INTO IMPUTED DATASET
    ##############################################################
    imputedDataset <- left_join(
      x  = imputedDataset, 
      y  = CSLdata[, qw("state year CA")],
      by = c("stateYoung" = "state", "yearYoung" = "year"))

    imputedDataset$CA.fac <- cut(imputedDataset$CA, c(-100, 7, 10, 100))
    
    # OVERWRITE THE ORIGINAL IMPUTED DATASET
    imputations[[impNum]] <- imputedDataset
  }

  
  
  ############################################################################
  # SAVE CSL-MERGED DATASET
  ############################################################################
  filenameOutput <- paste0('data/GSSImputedDataset', toTitleCase(depvar), '_coded.RDS')
  cat(paste0('Saving ', filenameOutput, "\n"))
  saveRDS(imputations, file = filenameOutput)
  
}
